Dependencies

library(tidyverse)
library(GGally) # for ggpairs()
library(pdp) # for partial dependence plots
library(gridExtra)
library(knitr)
library(patchwork) # for figure insets etc. 
library(ggtext)
library(betareg)
library(ggplot2)
library(terra)
library(sf)
theme_set(theme_classic())

read in data

Data compiled in the prepDataForModels.R script

modDat <- readRDS("../../../Data_processed/CoverData/DataForModels_spatiallyAveraged_withSoils_noSf.rds")

Here are the climate variables (30-year lag) we could potentially use in the models – however I think we should remove the 5th and 95th percentile variables here, since we’re interested in long-term averages rather than extremes

kable(modDat %>% 
        select(swe_meanAnnAvg_30yr:durationFrostFreeDays_meanAnnAvg_30yr) %>% 
        names()
)
x
swe_meanAnnAvg_30yr
tmin_meanAnnAvg_30yr
tmax_meanAnnAvg_30yr
tmean_meanAnnAvg_30yr
vp_meanAnnAvg_30yr
prcp_meanAnnTotal_30yr
T_warmestMonth_meanAnnAvg_30yr
T_coldestMonth_meanAnnAvg_30yr
precip_wettestMonth_meanAnnAvg_30yr
precip_driestMonth_meanAnnAvg_30yr
precip_Seasonality_meanAnnAvg_30yr
PrecipTempCorr_meanAnnAvg_30yr
aboveFreezing_month_meanAnnAvg_30yr
isothermality_meanAnnAvg_30yr
annWaterDeficit_meanAnnAvg_30yr
annWetDegDays_meanAnnAvg_30yr
annVPD_mean_meanAnnAvg_30yr
annVPD_max_meanAnnAvg_30yr
annVPD_min_meanAnnAvg_30yr
annVPD_max_95percentile_30yr
annWaterDeficit_95percentile_30yr
annWetDegDays_5percentile_30yr
durationFrostFreeDays_5percentile_30yr
durationFrostFreeDays_meanAnnAvg_30yr

Here are the weather anomaly variables (5-year lag) we could potentially use in the models

kable(modDat %>% 
        select(swe_meanAnn_5yrAnom:durationFrostFreeDays_meanAnnAvg_5yrAnom) %>% 
        names()
)
x
swe_meanAnn_5yrAnom
tmean_meanAnnAvg_5yrAnom
tmin_meanAnnAvg_5yrAnom
tmax_meanAnnAvg_5yrAnom
vp_meanAnnAvg_5yrAnom
prcp_meanAnnTotal_5yrAnom
T_warmestMonth_meanAnnAvg_5yrAnom
T_coldestMonth_meanAnnAvg_5yrAnom
precip_wettestMonth_meanAnnAvg_5yrAnom
precip_driestMonth_meanAnnAvg_5yrAnom
precip_Seasonality_meanAnnAvg_5yrAnom
PrecipTempCorr_meanAnnAvg_5yrAnom
aboveFreezing_month_meanAnnAvg_5yrAnom
isothermality_meanAnnAvg_5yrAnom
annWaterDeficit_meanAnnAvg_5yrAnom
annWetDegDays_meanAnnAvg_5yrAnom
annVPD_mean_meanAnnAvg_5yrAnom
annVPD_min_meanAnnAvg_5yrAnom
annVPD_max_meanAnnAvg_5yrAnom
annVPD_max_95percentile_5yrAnom
annWaterDeficit_95percentile_5yrAnom
annWetDegDays_5percentile_5yrAnom
durationFrostFreeDays_5percentile_5yrAnom
durationFrostFreeDays_meanAnnAvg_5yrAnom

And here are the soil variables we could potentially use in the models

kable(modDat %>% 
        select(soilDepth:totalAvailableWaterHoldingCapacity) %>% 
        names()
)
x
soilDepth
surfaceClay_perc
avgSandPerc_acrossDepth
avgCoarsePerc_acrossDepth
avgOrganicCarbonPerc_0_3cm
totalAvailableWaterHoldingCapacity

Correlation of potential weather predictors

This is using a subset of the data (50,000 rows), just for runtime purposes

Below is the correlation between only climate predictors that are averaged across 30 years * excluded 95th and 5th percentile variables * Dropped tmin, tmax, t_warmest month, and t_coldest month and replaced w/ MAT * Dropped month above freezing (highly correlated with frost-free days in all ecoregions)

# function to add colors to correlation plot
 my_fn <- function(data, mapping, method="p", use="pairwise", ...){

              # grab data
              x <- eval_data_col(data, mapping$x)
              y <- eval_data_col(data, mapping$y)

              # calculate correlation
              corr <- cor(x, y, method=method, use=use)

              # calculate colour based on correlation value
              # Here I have set a correlation of minus one to blue, 
              # zero to white, and one to red 
              # Change this to suit: possibly extend to add as an argument of `my_fn`
              colFn <- colorRampPalette(c("red", "white", "blue"), interpolate ='spline')
              fill <- colFn(100)[findInterval(corr, seq(-1, 1, length=100))]

              ggally_cor(data = data, mapping = mapping, stars = FALSE, 
                         digits = 2, colour = I("black"),...) + 
                theme_void() +
                theme(panel.background = element_rect(fill=fill))
              
 }

(corrPlot_30yrMean <- 
   modDat %>% 
   select(c(swe_meanAnnAvg_30yr, tmean_meanAnnAvg_30yr:prcp_meanAnnTotal_30yr,
            precip_wettestMonth_meanAnnAvg_30yr:PrecipTempCorr_meanAnnAvg_30yr, 
            isothermality_meanAnnAvg_30yr:annVPD_min_meanAnnAvg_30yr, 
            durationFrostFreeDays_meanAnnAvg_30yr)) %>% 
  rename("swe" = swe_meanAnnAvg_30yr,  "tmean" = tmean_meanAnnAvg_30yr,
         "vp" = vp_meanAnnAvg_30yr, "prcp" = prcp_meanAnnTotal_30yr, 
         "prcp \n Wettest" = precip_wettestMonth_meanAnnAvg_30yr, "prcpDriest" = precip_driestMonth_meanAnnAvg_30yr, 
         "prcp \n Seasonality" = precip_Seasonality_meanAnnAvg_30yr, "prcp \n TempCorr" = PrecipTempCorr_meanAnnAvg_30yr,  "isothermality" = isothermality_meanAnnAvg_30yr,
         "Wat \n Deficit" = annWaterDeficit_meanAnnAvg_30yr, "Wet \n DegDays" = annWetDegDays_meanAnnAvg_30yr, 
         "VPD \n mean" = annVPD_mean_meanAnnAvg_30yr, "VPD \n max" = annVPD_max_meanAnnAvg_30yr, "VPD \n min" = annVPD_min_meanAnnAvg_30yr,
         "frost \n FreeDays" = durationFrostFreeDays_meanAnnAvg_30yr) %>% 
  slice_sample(n = 5e4) %>% 
  #select(-matches("_")) %>% 
ggpairs( upper = list(continuous = my_fn), lower = list(continuous = GGally::wrap("points", alpha = 0.1, size=0.2)), progress = FALSE))

Correlations at Ecoregion scales

Here are the ecoregions, which are grouped Level 1 Ecoregions

# get data for model fitting
modRegions <- readRDS("../../../Data_processed/CoverData/DataForModels_withSubEcoreg.rds")
# also get data for ecoregions
ecoRegions <- sf::st_read("../../../Data_raw/Level1Ecoregions/", layer = "NA_CEC_Eco_Level1")
## Reading layer `NA_CEC_Eco_Level1' from data source 
##   `/Users/astears/Documents/Dropbox_static/Work/NAU_USGS_postdoc/PED_vegClimModels/Data_raw/Level1Ecoregions' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2140 features and 5 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -4334052 ymin: -3313739 xmax: 3324076 ymax: 4267265
## Projected CRS: Sphere_ARC_INFO_Lambert_Azimuthal_Equal_Area
# filter the ecoregion data by CONUS boundary
# add the ecsoregions together as we've done in the model data
CONUS <- st_read("../../../Data_raw/CONUS_extent", "CONUS_boundary") %>% 
  st_transform(st_crs(ecoRegions)) %>% 
  st_make_valid()
## Reading layer `CONUS_boundary' from data source 
##   `/Users/astears/Documents/Dropbox_static/Work/NAU_USGS_postdoc/PED_vegClimModels/Data_raw/CONUS_extent' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 10 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -124.6813 ymin: 25.12993 xmax: -67.00742 ymax: 49.38323
## Geodetic CRS:  GCS_unknown
# trim the ecoregions by CONUS extent
ecoRegCrop <- ecoRegions %>% 
  st_make_valid() %>% 
  st_intersection( CONUS)

unique(st_drop_geometry(modRegions)[,c("NA_L1NAME", "newRegion")])
##                              NA_L1NAME     newRegion
## 1      NORTHWESTERN FORESTED MOUNTAINS    westForest
## 2               NORTH AMERICAN DESERTS dryShrubGrass
## 3                    TEMPERATE SIERRAS    westForest
## 56         SOUTHERN SEMIARID HIGHLANDS dryShrubGrass
## 1119                      GREAT PLAINS dryShrubGrass
## 8227          MEDITERRANEAN CALIFORNIA dryShrubGrass
## 8248          MARINE WEST COAST FOREST    westForest
## 8315                              <NA>          <NA>
## 9672         EASTERN TEMPERATE FORESTS    eastForest
## 15184                 NORTHERN FORESTS    eastForest
## 77979             TROPICAL WET FORESTS    eastForest
## 100882                           WATER          <NA>
ecoRegCrop$newRegion <- NA
ecoRegCrop[ecoRegCrop$NA_L1NAME %in% c("NORTHWESTERN FORESTED MOUNTAINS", "TEMPERATE SIERRAS", "MARINE WEST COAST FOREST"), "newRegion"] <- "Western Forests"

ecoRegCrop[ecoRegCrop$NA_L1NAME %in% c("EASTERN TEMPERATE FORESTS", "NORTHERN FORESTS", "TROPICAL WET FORESTS"), "newRegion"] <- "Eastern Forests"

ecoRegCrop[ecoRegCrop$NA_L1NAME %in% c("NORTH AMERICAN DESERTS", "SOUTHERN SEMIARID HIGHLANDS", "GREAT PLAINS", "MEDITERRANEAN CALIFORNIA"), "newRegion"] <- "Grass and Shrubland"

ecoReg <- ecoRegCrop %>% 
  aggregate(by = list(ecoRegCrop$newRegion),
            FUN = unique,
            do_union = TRUE)
saveRDS(ecoReg, "../../../Data_processed/CoverData/ecoRegionExtents.RDS")

ggplot(data = ecoReg) + 
  geom_sf(aes(fill = newRegion)) + 
  scale_fill_discrete(guide = 
  guide_legend(title = c("Ecoregion")))

# pdf(file = "../../../figures/EcoregionMap.pdf")
# ggplot(data = ecoReg) + 
#   geom_sf(aes(fill = newRegion)) + 
#   scale_fill_discrete(guide = 
#   guide_legend(title = c("Ecoregion")))
# dev.off()

Dry shrublands and grasslands

#modDat_ecoregions <- readRDS("../../../data/DataForModels_withEcoregion.rds")
# just use modDat data
# get data 
 plotDat <- 
   modDat %>% 
   filter(newRegion == "dryShrubGrass") %>% 
   #st_drop_geometry() %>% 
   select(c(swe_meanAnnAvg_30yr, tmean_meanAnnAvg_30yr:prcp_meanAnnTotal_30yr,
            precip_wettestMonth_meanAnnAvg_30yr:PrecipTempCorr_meanAnnAvg_30yr, 
            isothermality_meanAnnAvg_30yr:annVPD_min_meanAnnAvg_30yr, 
            durationFrostFreeDays_meanAnnAvg_30yr)) %>% 
  rename("swe" = swe_meanAnnAvg_30yr,  "tmean" = tmean_meanAnnAvg_30yr,
         "vp" = vp_meanAnnAvg_30yr, "prcp" = prcp_meanAnnTotal_30yr, 
         "prcp \n Wettest" = precip_wettestMonth_meanAnnAvg_30yr, "prcpDriest" = precip_driestMonth_meanAnnAvg_30yr, 
         "prcp \n Seasonality" = precip_Seasonality_meanAnnAvg_30yr, "prcp \n TempCorr" = PrecipTempCorr_meanAnnAvg_30yr,  "isothermality" = isothermality_meanAnnAvg_30yr,
         "Wat \n Deficit" = annWaterDeficit_meanAnnAvg_30yr, "Wet \n DegDays" = annWetDegDays_meanAnnAvg_30yr, 
         "VPD \n mean" = annVPD_mean_meanAnnAvg_30yr, "VPD \n max" = annVPD_max_meanAnnAvg_30yr, "VPD \n min" = annVPD_min_meanAnnAvg_30yr,
         "frost \n FreeDays" = durationFrostFreeDays_meanAnnAvg_30yr) %>% 
     drop_na(swe, tmean, prcp,  "prcp \n TempCorr", isothermality, "Wat \n Deficit", 
       "Wet \n DegDays", "VPD \n max" )  %>% 
  slice_sample(n = 5e5) 
  #select(-matches("_")) %>% 
 
  p1 <- ggpairs(plotDat, upper = list(continuous = my_fn),
                lower = list(continuous = GGally::wrap("points", alpha = 0.1, size=0.2)), title = "Dry shrubland and grassland", progress = FALSE)
print(p1)

Eastern forests

#modDat_ecoregions <- readRDS("../../../data/DataForModels_withEcoregion.rds")
# just use modDat data
# get data 
 plotDat <- 
   modDat %>% 
   filter(newRegion == "eastForest") %>% 
   #st_drop_geometry() %>% 
   select(c(swe_meanAnnAvg_30yr, tmean_meanAnnAvg_30yr:prcp_meanAnnTotal_30yr,
            precip_wettestMonth_meanAnnAvg_30yr:PrecipTempCorr_meanAnnAvg_30yr, 
            isothermality_meanAnnAvg_30yr:annVPD_min_meanAnnAvg_30yr, 
            durationFrostFreeDays_meanAnnAvg_30yr)) %>% 
  rename("swe" = swe_meanAnnAvg_30yr,  "tmean" = tmean_meanAnnAvg_30yr,
         "vp" = vp_meanAnnAvg_30yr, "prcp" = prcp_meanAnnTotal_30yr, 
         "prcp \n Wettest" = precip_wettestMonth_meanAnnAvg_30yr, "prcpDriest" = precip_driestMonth_meanAnnAvg_30yr, 
         "prcp \n Seasonality" = precip_Seasonality_meanAnnAvg_30yr, "prcp \n TempCorr" = PrecipTempCorr_meanAnnAvg_30yr,  "isothermality" = isothermality_meanAnnAvg_30yr,
         "Wat \n Deficit" = annWaterDeficit_meanAnnAvg_30yr, "Wet \n DegDays" = annWetDegDays_meanAnnAvg_30yr, 
         "VPD \n mean" = annVPD_mean_meanAnnAvg_30yr, "VPD \n max" = annVPD_max_meanAnnAvg_30yr, "VPD \n min" = annVPD_min_meanAnnAvg_30yr,
         "frost \n FreeDays" = durationFrostFreeDays_meanAnnAvg_30yr) %>% 
     drop_na(swe, tmean, prcp,  "prcp \n TempCorr", isothermality, "Wat \n Deficit", 
       "Wet \n DegDays", "VPD \n max" ) %>% 
  slice_sample(n = 5e5) 
  #select(-matches("_")) %>% 
 
  p1 <- ggpairs(plotDat, upper = list(continuous = my_fn),
                lower = list(continuous = GGally::wrap("points", alpha = 0.1, size=0.2)), 
                title = "Eastern Forests", progress = FALSE)
print(p1)

Western forests

#modDat_ecoregions <- readRDS("../../../data/DataForModels_withEcoregion.rds")
# just use modDat data
# get data 
 plotDat <- 
   modDat %>% 
   filter(newRegion == "westForest") %>% 
   #st_drop_geometry() %>% 
   select(c(swe_meanAnnAvg_30yr, tmean_meanAnnAvg_30yr:prcp_meanAnnTotal_30yr,
            precip_wettestMonth_meanAnnAvg_30yr:annVPD_min_meanAnnAvg_30yr, durationFrostFreeDays_meanAnnAvg_30yr)) %>% 
  rename("swe" = swe_meanAnnAvg_30yr,  "tmean" = tmean_meanAnnAvg_30yr,
         "vp" = vp_meanAnnAvg_30yr, "prcp" = prcp_meanAnnTotal_30yr, 
         "prcp \n Wettest" = precip_wettestMonth_meanAnnAvg_30yr, "prcpDriest" = precip_driestMonth_meanAnnAvg_30yr, 
         "prcp \n Seasonality" = precip_Seasonality_meanAnnAvg_30yr, "prcp \n TempCorr" = PrecipTempCorr_meanAnnAvg_30yr, 
         "abvFreeze \n Month" = aboveFreezing_month_meanAnnAvg_30yr, "isothermality" = isothermality_meanAnnAvg_30yr,
         "Wat \n Deficit" = annWaterDeficit_meanAnnAvg_30yr, "Wet \n DegDays" = annWetDegDays_meanAnnAvg_30yr, 
         "VPD \n mean" = annVPD_mean_meanAnnAvg_30yr, "VPD \n max" = annVPD_max_meanAnnAvg_30yr, "VPD \n min" = annVPD_min_meanAnnAvg_30yr,
         "frost \n FreeDays" = durationFrostFreeDays_meanAnnAvg_30yr) %>% 
     drop_na(swe, tmean, prcp,  "prcp \n TempCorr", isothermality, "Wat \n Deficit", 
       "Wet \n DegDays", "VPD \n max" )  %>% 
  slice_sample(n = 5e5) 
  #select(-matches("_")) %>% 
 
  p1 <- ggpairs(plotDat, upper = list(continuous = my_fn),
                lower = list(continuous = GGally::wrap("points", alpha = 0.1, size=0.2)), 
                title = "Western Forests", progress = FALSE)
print(p1)